import pandas as pd


def breakpoints(df, quantiles, groupby_cols=None, breakpoint_groups=None):
    """
    Calculate quantile breakpoints for some variable for each entry in groupby_col.

    Parameters:
    -----------
    df: pandas.DataFrame
        Input data frame.
    quantiles: dictionary
        Dictionary key defines data frame column that is used to calculate breakpoints. Dictionary values define
        breakpoint quantiles that are included in the output. Example: {'beme': [0.3, 0.7]}.
    groupby_cols: string or list, optional
        Groupby variable(s). Breakpoints are calculated separately for each group.
        Typically date, but can also include e.g. industry identifier.
    breakpoint_groups: dictionary, optional
        If defined, calculate breakpoints among observations that belong to these groups.
        Dictionary key defines data frame column that determine relevant groups and the values determine relevant
        group values.
        Example: if we want to calculate breakpoints among NYSE firms, we set
        breakpoint_groups={'exchcd': [1]}

    Returns:
    --------
    Data frame with breakpoints per groupby_col and quantile bin.
    """
    assert len(quantiles) == 1
    df = df.copy()
    var_name = list(quantiles.keys())[0]

    if breakpoint_groups:
        assert len(breakpoint_groups) == 1
        key = list(breakpoint_groups.keys())[0]
        df = df[df[key].isin(breakpoint_groups[key])]

    if groupby_cols is not None:
        outdf = df.groupby(groupby_cols)[var_name].quantile(quantiles[var_name]).unstack().reset_index()
    else:
        outdf = df[[var_name]].quantile(quantiles[var_name]).T.reset_index(drop=True)

    return outdf


def sort_portfolios(df, quantiles, groupby_cols=None, panel_col=None, breakpoint_groups=None,
                    numeric_folio_names=False):
    """
    Sort securities into quantile portfolios, and add portfolio assignments back into security data frame as column(s).

    Parameters:
    -----------
    df: pandas.DataFrame
        Input data frame with groupby_cols-panel_col as a valid primary key.
    quantiles: dictionary
        Dictionary key(s) define data frame column(s) that are used to create sort portfolios. Dictionary values define
        breakpoint quantiles that are included in the output. Example: {'me': [0.5], 'beme': [0.3, 0.7]}.
        Observations at breakpoints will be assigned to the portfolio above the threshold.
    groupby_cols: string or list, optional
        Name(s) of the df column(s) that contains groupby variable(s). Sorting is done separately for each group.
        Typically date, but can also include e.g. industry identifier.
    panel_col: string or list, optional
        Name(s) of the df column(s) that contains panel variable(s), typically security identifier.
    breakpoint_groups: dictionary, optional
        If defined, calculate breakpoints among observations that belong to these groups.
        Dictionary key defines data frame column that determine relevant groups and the values determine relevant
        group values.
        Example: if we want to calculate breakpoints among NYSE firms, we set breakpoint_groups={'exchcd': [1]}
    numeric_folio_names: boolean, default False
        By default portfolio names carry the characteristic name and bucket number, ex. BEME1, BEME2, BEME3.
        If numeric_folio_names is True, portfolio names will be integers only referring to the bucket number.
    """
    for variable, bps in quantiles.items():
        bp = breakpoints(df, {variable: bps}, groupby_cols, breakpoint_groups)

        if groupby_cols:
            df = df.merge(bp, on=groupby_cols, how='left', validate='m:1')
        else:
            bp['key'] = 1
            df['key'] = 1
            df = df.merge(bp, on=['key'], how='left', validate='m:1').drop(columns=['key'])
        colname = variable + '_portfolio'

        def _valname(x):
            return int(x) if numeric_folio_names else variable + str(x)

        df[colname] = pd.NA
        df.loc[df[variable] < df[bps[0]], colname] = _valname(1)
        for n, bp in enumerate(bps):
            df.loc[df[variable] >= df[bp], colname] = _valname(n + 2)

        df = df.drop(columns=bps)

    return df
